import pandas as pd
df = pd.read_csv('city.csv', sep=';')
df
df.info()
df.info(memory_usage='deep')
df.memory_usage()
df.memory_usage(deep=True)
df['CountryCode'] = df.CountryCode.astype('category')
df.info(memory_usage='deep')
df.memory_usage(deep=True)
df2 = pd.read_csv('city.csv', sep=';')
df2
df2.info(memory_usage='deep')
%%timeit
df2.groupby('CountryCode').count()
%%timeit
df.groupby('CountryCode').count()
df.CountryCode.cat.codes
df.CountryCode.cat.categories
df3 = pd.DataFrame({
'name': ['John', 'Jack', 'Katy', 'Paul', 'Susan'],
'mark': ['good', 'excellent', 'bad', 'middle', 'good']
})
df3
def get_mark(v):
if v == 'good':
return 4
elif v == 'excellent':
return 5
elif v == 'bad':
return 2
else:
return 3
df3['rate'] = df3.mark.apply(get_mark)
df3
df3[ df3.rate > 3 ]
df3 = pd.DataFrame({
'name': ['John', 'Jack', 'Katy', 'Paul', 'Susan'],
'mark': ['good', 'excellent', 'bad', 'middle', 'good']
})
df3
d_mark = {'good': 4, 'excellent': 5, 'bad': 2, 'middle': 3}
df3['rate'] = df3.mark.map(d_mark)
df3
df3 = pd.DataFrame({
'name': ['John', 'Jack', 'Katy', 'Paul', 'Susan'],
'mark': ['good', 'excellent', 'bad', 'middle', 'good']
})
df3
df3['mark'] = pd.Categorical(df3.mark, categories=['bad', 'middle', 'good', 'excellent'], ordered=True)
df3.info()
df3.mark
df3[ df3.mark > 'middle' ].sort_values(by='mark', ascending=False)
df3.mark.max()